### Preliminary ---------------------------------------------------------------
# Load libraries
library(car)
library(fixest)
library(ggcorrplot)
library(modelsummary)
library(tidyverse)

# Load data
leg_df <- read_csv("Legislator and District Data.csv")

# Set options
options("modelsummary_format_numeric_latex" = "plain")

### Figure 3: District Demographics and Asian Representation--------------------
# Run logistic regression models
logit_1 <- feglm(SlegAsian ~ PctAsian + 
                   IdeolMRP + MedianIncome + PctCollege + 
                   PctBlack + PctHispanic + PctOther 
                 | StateAbb^Year,
                 vcov = "cluster",
                 data = leg_df,
                 family = binomial(link = "logit"))

logit_2 <- feglm(SlegAsian ~ PctAsianNative + PctAsianNaturalized + PctAsianNoncitizen + 
                   IdeolMRP + MedianIncome + PctCollege + 
                   PctBlack + PctHispanic + PctOther
                 | StateAbb^Year,
                 vcov = "cluster",
                 data = leg_df,
                 family = binomial(link = "logit"))

logit_3 <- feglm(SlegAsian ~ PctLanguageEnglish + PctLanguageAsian +
                   IdeolMRP + MedianIncome + PctCollege + 
                   PctBlack + PctHispanic + PctOther  
                 | StateAbb^Year,
                 vcov = "cluster",
                 data = leg_df,
                 family = binomial(link = "logit"))

logit_4 <- feglm(SlegAsian ~ PctAsianAlone + PctAsianTwoPlus + 
                   IdeolMRP + MedianIncome + PctCollege + 
                   PctBlack + PctHispanic + PctOther  
                 | StateAbb^Year,
                 vcov = "cluster",
                 data = leg_df,
                 family = binomial(link = "logit"))

logit_5 <- feglm(SlegAsian ~ PctAsianCollege + PctAsianNoncollege + 
                   IdeolMRP + MedianIncome + PctCollege + 
                   PctBlack + PctHispanic + PctOther
                 | StateAbb^Year,
                 vcov = "cluster",
                 data = leg_df,
                 family = binomial(link = "logit"))

logit_6 <- feglm(SlegAsian ~ PctAsianLowInc + PctAsianMidInc + PctAsianHighInc + 
                   IdeolMRP + MedianIncome + PctCollege + 
                   PctBlack + PctHispanic + PctOther
                   | StateAbb^Year,
                 vcov = "cluster",
                 data = leg_df,
                 family = binomial(link = "logit"))

# Calculate p-values for difference between coefficients referenced in text
linearHypothesis(logit_3, "PctLanguageEnglish = PctLanguageAsian")
linearHypothesis(logit_4, "PctAsianAlone = PctAsianTwoPlus")
linearHypothesis(logit_6, "PctAsianLowInc = PctAsianMidInc")

# Put estimates into dataframe for plotting
graphing_data <- data.frame(
  variables = c("% Asian",  "% Asian (Native-Born)", "% Asian (Naturalized)", 
                "% Asian (Noncitizen)",
                "% Asian (English Language)", "% Asian (Other Language)",
                "% Asian (Single Race)", "% Asian (Multiracial)",
                "% Asian (College)", "% Asian (Noncollege)",
                "% Asian (Low Income)", "% Asian (Middle Income)", 
                "% Asian (High Income)"),
  group = c("Baseline",  "Citizenship", "Citizenship", "Citizenship",
            "Language", "Language", "Race", "Race",
            "Education", "Education", "Income", "Income", "Income"),
  coefs = c(logit_1$coefficients[1], 
            logit_2$coefficients[1], logit_2$coefficients[2], logit_2$coefficients[3], 
            logit_3$coefficients[1], logit_3$coefficients[2], 
            logit_4$coefficients[1], logit_4$coefficients[2],
            logit_5$coefficients[1], logit_5$coefficients[2], 
            logit_6$coefficients[1], logit_6$coefficients[2], logit_6$coefficients[3]),
  ses = c(logit_1$se[1], logit_2$se[1], logit_2$se[2], 
          logit_2$se[3], logit_3$se[1], logit_3$se[2],
          logit_4$se[1], logit_4$se[2], logit_5$se[1], 
          logit_5$se[2], logit_6$se[1], logit_6$se[2], 
          logit_6$se[3])) %>%
  mutate(lower = coefs - 1.96 * ses,
         upper = coefs + 1.96 * ses,
         variables = factor(variables, 
                            levels = rev(c("% Asian",  
                                           "% Asian (Native-Born)", "% Asian (Naturalized)", "% Asian (Noncitizen)",
                                           "% Asian (English Language)", "% Asian (Other Language)",
                                           "% Asian (Multiracial)", "% Asian (Single Race)", 
                                           "% Asian (College)", "% Asian (Noncollege)",
                                           "% Asian (Low Income)", "% Asian (Middle Income)", "% Asian (High Income)"))),
         group = factor(group,
                        levels = rev(c("Baseline", "Citizenship", "Language", 
                                       "Race", "Education", "Income"))))


# Plot coefficient estimates 
ggplot(graphing_data, aes(x = coefs, y = variables)) +
  geom_point(aes(shape = group)) +
  geom_vline(aes(xintercept = 0), lty = "dashed") +
  geom_errorbarh(aes(xmin = lower, xmax = upper),
                 height = 0) +
  scale_x_continuous(limits = c(-0.8, 1.05)) +
  scale_y_discrete(limits = c(levels(graphing_data$variables)[1:3], "Empty1",
                              levels(graphing_data$variables)[4:5], "Empty2",
                              levels(graphing_data$variables)[6:7], "Empty3",
                              levels(graphing_data$variables)[8:9], "Empty4",
                              levels(graphing_data$variables)[10:12], "Empty5",
                              levels(graphing_data$variables)[13]),
                              labels = c("Empty1" = "", "Empty2" = "",
                                         "Empty3" = "", "Empty4" = "",
                                         "Empty5" = "")) +
  scale_color_brewer(palette = "Dark2",
                     guide = guide_legend(reverse = TRUE)) + 
  scale_shape_manual(values =  rev(c(19, 15, 17, 1, 0, 2)),
                     guide = guide_legend(reverse = TRUE)) + 
  labs(x = "Coefficient from Logistic Regression Model",
       y = NULL, color = NULL, shape = NULL) + 
  theme_bw() +
  theme(
    panel.grid.minor = element_blank(),
    panel.grid.major.y = element_blank(),
    axis.ticks.y = element_blank(),
    legend.position = "bottom"
  )

### Table S6: Full Set of Coefficients for Figure 3-----------------------------
# Put figure regression estimates into table form
modelsummary(list(logit_1, logit_2, logit_3, logit_4, logit_5, logit_6),
             output = "latex",
             title = "Full Regression Estimates for Figure 3",
             fmt=3,
             stars= c('+' = 0.10,
                      '*' = .05,
                      '**' = .01),
             statistic = "({std.error})",
             estimate  = "{estimate}{stars}",
             coef_map = c("PctAsian" = "% Population Asian",
                          "PctAsianNaturalized" = "% Population Naturalized Asian",
                          "PctAsianNoncitizen" = "% Population Noncitizen Asian",
                          "PctAsianNative" = "% Population Native-Born Asian",
                          "PctLanguageEnglish" = "% Population English-Speaking Asian",
                          "PctLanguageAsian" = "% Population Non-English-Speaking Asian",
                          "PctAsianAlone" = "% Population Asian Alone",
                          "PctAsianTwoPlus" = "% Population Multiracial Asian",
                          "PctAsianCollege" = "% Population College-Educated Asian",
                          "PctAsianNoncollege" = "% Population Non-College-Educated Asian",
                          "PctAsianLowInc" = "% Population Low-Income Asian",
                          "PctAsianMidInc" = "% Population Middle-Income Asian", 
                          "PctAsianHighInc" = "% Population High-Income Asian",
                          "IdeolMRP" = "District Conservatism",
                          "MedianIncome" = "Median Income",
                          "PctCollege" = "% Population with College Degree",
                          "PctBlack" = "% Population Black",
                          "PctHispanic" = "% Population Hispanic",
                          "PctOther" = "% Population Other Nonwhite"),
             note = "+p<0.10; *p<0.05; **p<0.01",
             gof_map = c("nobs", "aic", "rmse"),
             add_rows = data.frame(Term = c("State-Election Year FEs"),
                                   Logit_1 = "Y", Logit_2 = "Y", Logit_3 = "Y",
                                   Logit_4 = "Y", Logit_5 = "Y", Logit_6 = "Y")
)

### Figure S3: Correlation Matrix For Demographic Variables --------------------
leg_df %>%
  select(PctAsianNative, PctAsianNaturalized, PctAsianNoncitizen,
         PctLanguageEnglish, PctLanguageAsian, PctAsianTwoPlus, PctAsianAlone,
         PctAsianCollege, PctAsianNoncollege, PctAsianLowInc, PctAsianMidInc,
         PctAsianHighInc) %>%
  select(12:1) %>% 
  cor(use = "complete.obs") %>%
  ggcorrplot(type = "upper")

### Figure S7 and Table S7: Co-Ethnic Versus Pan-Ethnic Analysis ---------------
# Estimate logistic regressions
logit_1 <- feglm(SlegChinese ~ PctAsian + PctChinese + 
                   IdeolMRP + MedianIncome + PctCollege + 
                   PctBlack + PctHispanic + PctOther 
                 | StateAbb^Year,
                 vcov = "cluster",
                 data = leg_df,
                 family = binomial(link = "logit"))

logit_2 <- feglm(SlegIndian ~ PctAsian + PctIndian + 
                   IdeolMRP + MedianIncome + PctCollege + 
                   PctBlack + PctHispanic + PctOther 
                 | StateAbb^Year,
                 vcov = "cluster",
                 data = leg_df,
                 family = binomial(link = "logit"))

logit_3 <- feglm(SlegJapanese ~ PctAsian + PctJapanese + 
                   IdeolMRP + MedianIncome + PctCollege + 
                   PctBlack + PctHispanic + PctOther 
                 | StateAbb^Year,
                 vcov = "cluster",
                 data = leg_df,
                 family = binomial(link = "logit"))

logit_4 <- feglm(SlegFilipino ~ PctAsian + PctFilipino + 
                   IdeolMRP + MedianIncome + PctCollege + 
                   PctBlack + PctHispanic + PctOther 
                 | StateAbb^Year,
                 vcov = "cluster",
                 data = leg_df,
                 family = binomial(link = "logit"))

logit_5 <- feglm(SlegKorean ~ PctAsian + PctKorean + 
                   IdeolMRP + MedianIncome + PctCollege + 
                   PctBlack + PctHispanic + PctOther 
                 | StateAbb^Year,
                 vcov = "cluster",
                 data = leg_df,
                 family = binomial(link = "logit"))

# Put table into form for graphing
graphing_data <-
  data.frame(
    Representative = c(rep("Chinese", 2), rep("Indian", 2), rep("Japanese", 2),
                       rep("Filipino", 2), rep("Korean", 2)),
    Population = c("Pan-Ethnic Asian Population", "Co-Ethnic Population",
                   "Pan-Ethnic Asian Population", "Co-Ethnic Population",
                   "Pan-Ethnic Asian Population", "Co-Ethnic Population",
                   "Pan-Ethnic Asian Population", "Co-Ethnic Population",
                   "Pan-Ethnic Asian Population", "Co-Ethnic Population"),
    Estimate = c(coef(logit_1)[c(1, 2)], coef(logit_2)[c(1, 2)],
                 coef(logit_3)[c(1, 2)], coef(logit_4)[c(1, 2)],
                 coef(logit_5)[c(1, 2)]),
    SE = c(se(logit_1)[c(1, 2)], se(logit_2)[c(1, 2)],
           se(logit_3)[c(1, 2)], se(logit_4)[c(1, 2)],
           se(logit_5)[c(1, 2)])
  ) %>%
  mutate(lower = Estimate - 1.96 * SE,
         upper = Estimate + 1.96 * SE,
         Representative = factor(Representative,
                                 levels = c("Japanese", "Filipino", "Korean",
                                            "Indian", "Chinese")))

# Create graph
ggplot(graphing_data, aes(x = Estimate, y = Representative, color = Population)) +
  geom_point(aes(shape = Population),
             position = position_dodge(width = 0.5)) +
  geom_vline(aes(xintercept = 0), lty = "dashed") +
  geom_errorbarh(aes(xmin = lower, xmax = upper),
                 height = 0,
                 position = position_dodge(width = 0.5)) +
  scale_color_manual(values = c("black", "gray50")) + 
  labs(x = "Coefficient from Logistic Regression Model",
       y = "Representative \n Ethnicity", color = NULL, shape = NULL) + 
  theme_bw() +
  theme(
    panel.grid.minor = element_blank(),
    panel.grid.major.y = element_blank(),
    axis.ticks.y = element_blank(),
    legend.position = "bottom"
  )

# Table S7: Full Set of Coefficients for Figure S6 ----------------------------
modelsummary(list(logit_1, logit_2, logit_3, logit_4, logit_5),
             output = "latex",
             title = "Full Regression Estimates for Figure S7",
             fmt=3,
             stars= c('*' = .05,
                      '**' = .01),
             statistic = "({std.error})",
             estimate  = "{estimate}{stars}",
             coef_map = c("PctChinese" = "% Population Same Ethnicity",
                          "PctIndian" = "% Population Same Ethnicity",
                          "PctKorean" = "% Population Same Ethnicity",
                          "PctFilipino" = "% Population Same Ethnicity",
                          "PctJapanese" = "% Population Same Ethnicity",
                          "PctAsian" = "% Population Any Asian",
                          "IdeolMRP" = "District Conservatism",
                          "MedianIncome" = "Median Income",
                          "PctCollege" = "% Population with College Degree",
                          "PctBlack" = "% Population Black",
                          "PctHispanic" = "% Population Hispanic",
                          "PctOther" = "% Population Other Nonwhite"),
             note = "*p<0.05; **p<0.01",
             gof_map = c("nobs", "rmse"),
             add_rows = data.frame(Term = c("Ethnicity", "State-Election Year FEs"),
                                   Logit_1 = c("Chinese", "Y"),
                                   Logit_2 = c("Indian", "Y"), 
                                   Logit_3 = c("Korean", "Y"),
                                   Logit_4 = c("Filipino", "Y"), 
                                   Logit_6 = c("Japanese", "Y"))
)
